/***
This do-file creates a table comparing industry shares for the QCEW, 
small businesses from the SUSB, Paychex-Intuit, and Earnin.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create required subfolders
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

* Erase output numbers
cap erase "${root}/results/paper numbers/`category'/Change in Employment Rates to April by State Paychex-Intuit-Earnin vs. CES.yaml"
cap erase "${root}/results/paper numbers/`category'/Change in Employment Rates to April by 2-Digit NAICS Code Paychex-Intuit-Earnin vs. CES.yaml"

*-------------------------------------------------------------------------------
* QCEW
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/Industry Shares of Employment/2019q1_national_est_size_x_industry.dta")
use "${root}/data/dvc/Industry Shares of Employment/2019q1_national_est_size_x_industry.dta", clear

*Clean
keep industry_code size_code month1_emplvl month2_emplvl month3_emplvl

*naics formatting
tostring industry_code, replace
replace industry_code = substr(industry_code, 1, 2)
replace industry_code = "31-33" if inlist(industry_code, "31", "32", "33")
replace industry_code = "44-45" if inlist(industry_code, "44", "45")
replace industry_code = "48-49" if inlist(industry_code, "48", "49")

*Take Average of Monthly Employment
gegen avg_employment = rowmean(month1_emplvl month2_emplvl month3_emplvl)


*Indicator for Above/Below 50 Employees
gen above_50 = size_code >= 5

*Collapse to Industry x Size
gcollapse (rawsum) avg_employment, by(industry_code above_50)

*Get industry shares
bys above_50: gegen total_employment = total(avg_employment)
gen qcew_employment_share = 100*(avg_employment/total_employment)

keep industry_code above_50 qcew_employment_share
*Save shares

rename qcew_employment_share qcew_employment_share_

reshape wide qcew_employment_share_, i(industry_code) j(above_50)
rename qcew_employment_share_0 qcew_small_employment_share
rename qcew_employment_share_1 qcew_large_employment_share

tempfile qcew_share
save `qcew_share'

*Aggregated
project, uses("${root}/data/dvc/Industry Shares of Employment/2019q1_national_est_size_x_industry.dta")
use "${root}/data/dvc/Industry Shares of Employment/2019q1_national_est_size_x_industry.dta", clear

*Clean
keep industry_code size_code month1_emplvl month2_emplvl month3_emplvl

*naics formatting
tostring industry_code, replace
replace industry_code = substr(industry_code, 1, 2)
replace industry_code = "31-33" if inlist(industry_code, "31", "32", "33")
replace industry_code = "44-45" if inlist(industry_code, "44", "45")
replace industry_code = "48-49" if inlist(industry_code, "48", "49")

*Take Average of Monthly Employment
gegen avg_employment = rowmean(month1_emplvl month2_emplvl month3_emplvl)

*Collapse to Industry x Size
gcollapse (rawsum) avg_employment, by(industry_code)

*Get industry shares
gegen total_employment = total(avg_employment)
gen qcew_all_employment_share = 100*(avg_employment/total_employment)

keep industry_code qcew_all_employment_share

merge 1:1 industry_code using `qcew_share'

rename industry_code naics

keep naics qcew*

tempfile qcew
save `qcew'

*-------------------------------------------------------------------------------
* Paychex - Intuit
*-------------------------------------------------------------------------------
*import paychex
project, uses("${root}/data/dvc/Industry Shares of Employment/Paychex-Intuit Industry Shares.dta")
use "${root}/data/dvc/Industry Shares of Employment/Paychex-Intuit Industry Shares.dta", clear 
decode naics, gen(new) 
replace new = string(naics) if new == ""
drop naics
rename new naics
rename prop paychex_intuit_ind_share

tempfile paychex 
save `paychex'

*-------------------------------------------------------------------------------
* Earnin 
*-------------------------------------------------------------------------------

* Keep nonmissing private industries
project, uses("${root}/data/dvc/Earnin/historical/Earnin employment BY zip and industry.dta")
use if !inlist(naics, 0, 92) using "${root}/data/dvc/Earnin/historical/Earnin employment BY zip and industry.dta", clear

* Merge counties 
preserve
	project, uses("${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt")
	import delimited "${root}/data/dvc/ACS 2014-2018 5-Year ZCTA/Area/zcta_county_rel_10.txt", encoding(ISO-8859-2) clear
	sort zcta5 poppt
	bysort zcta5 (poppt): keep if _n == _N                                      // only keeps the county where most of the zcta lives
	gen county_fips = 1000 * state + county
	keep zcta5 county_fips
	rename zcta5 zip 
	tempfile zcta_county_CW
	save `zcta_county_CW'
restore

merge m:1 zip using `zcta_county_CW', keep(1 3) nogen

* Drop geographies outside the 50 states + DC 
gen state_fips = floor(county_fips / 1000)
drop if state_fips == 72 | missing(state_fips)

* Date
gen date = week * 7 + 21546
format date %td

gcollapse (sum) emp if month(date) == 1 & year(date) == 2020, by(naics)
gegen total_emp = sum(emp)
gen earnin_ind_share = (emp / total_emp) * 100

label drop _all
tostring naics, replace force
replace naics = "31-33" if naics == "31"
replace naics = "44-45" if naics == "44"
replace naics = "48-49" if naics == "48"
assert !mi(naics)

keep earnin_ind_share naics

*-------------------------------------------------------------------------------
* Combine all data
*-------------------------------------------------------------------------------

merge 1:1 naics using `paychex', nogen
merge 1:1 naics using `qcew', nogen

*Shorten descriptions
rename naics naics2
project, uses("${root}/data/dvc/Industry Shares of Employment/naics_2digit_bg_cw.dta")
merge2 1:1 naics2 using "${root}/data/dvc/Industry Shares of Employment/naics_2digit_bg_cw.dta", nogen keepusing(industry_desc) keep(1 3)
rename naics2 naics
replace industry_desc = "Unclassified" if mi(industry_desc)

* drop QCEW large establishments since it's now in the paper:
drop qcew_large_employment_share 

rename industry_desc industry_description
order naics industry_description qcew* paychex* earnin*

* export 
replace paychex_intuit_ind_share = paychex_intuit_ind_share * 100
ds naics industry_description, not
foreach var in `r(varlist)' {
	gen double `var'2 = round(`var', 0.01)
	drop `var'
	rename `var'2 `var'
	tostring `var', force replace
	replace `var' = "" if `var' == "."
	replace `var' = subinstr(`var', ".", "0.", .) if strpos(`var', ".") == 1
}

export excel "${root}/results/new_app_table_3.xlsx", sheet(new_app_table_3, replace) firstrow(var)
project, creates("${root}/results/new_app_table_3.xlsx")
